library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggthemes)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
Descargar datos: datos
eukaryotes <- read_csv("data/eukaryotes.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
## `#Organism Name` = col_character(),
## `Organism Groups` = col_character(),
## Strain = col_character(),
## BioSample = col_character(),
## BioProject = col_character(),
## Assembly = col_character(),
## Level = col_character(),
## `Size(Mb)` = col_double(),
## `GC%` = col_double(),
## Replicons = col_character(),
## WGS = col_character(),
## Scaffolds = col_double(),
## CDS = col_double(),
## `Release Date` = col_datetime(format = ""),
## `GenBank FTP` = col_character(),
## `RefSeq FTP` = col_character()
## )
eukaryotes %>% glimpse()
## Rows: 5,131
## Columns: 16
## $ `#Organism Name` <chr> "Emiliania huxleyi CCMP1516", "Arabidopsis thaliana…
## $ `Organism Groups` <chr> "Eukaryota;Protists;Other Protists", "Eukaryota;Pla…
## $ Strain <chr> "CCMP1516", NA, "A17", NA, NA, NA, NA, NA, NA, "B80…
## $ BioSample <chr> "SAMN02744062", "SAMN03081427", "SAMN02299339", "SA…
## $ BioProject <chr> "PRJNA77753", "PRJNA10719", "PRJNA10791", "PRJNA198…
## $ Assembly <chr> "GCA_000372725.1", "GCA_000001735.1", "GCA_00021949…
## $ Level <chr> "Scaffold", "Chromosome", "Chromosome", "Chromosome…
## $ `Size(Mb)` <dbl> 167.67600, 119.66800, 412.92400, 978.97200, 823.786…
## $ `GC%` <dbl> 64.5000, 36.0528, 34.0470, 35.1208, 35.7097, 0.0000…
## $ Replicons <chr> NA, "chromosome 1:NC_003070.9/CP002684.1; chromosom…
## $ WGS <chr> "AHAL01", NA, "APNO01", "ACUP02", "AEKE02", "FJWB02…
## $ Scaffolds <dbl> 7795, 7, 2187, 1191, 3224, 72295, 58, 279439, 598, …
## $ CDS <dbl> 38554, 48350, 57661, 71525, 36010, 0, 41070, 0, 584…
## $ `Release Date` <dttm> 2013-04-19, 2001-08-13, 2011-08-12, 2010-01-05, 20…
## $ `GenBank FTP` <chr> "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/372…
## $ `RefSeq FTP` <chr> "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/372…
eukaryotes %>% select(c("#Organism Name","Organism Groups","Strain","Size(Mb)","GC%","CDS")) -> eukaryotes_clean
eukaryotes_clean %>% head()
## # A tibble: 6 x 6
## `#Organism Name` `Organism Groups` Strain `Size(Mb)` `GC%` CDS
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Emiliania huxleyi CCM… Eukaryota;Protists;Othe… CCMP15… 168. 64.5 38554
## 2 Arabidopsis thaliana Eukaryota;Plants;Land P… <NA> 120. 36.1 48350
## 3 Medicago truncatula Eukaryota;Plants;Land P… A17 413. 34.0 57661
## 4 Glycine max Eukaryota;Plants;Land P… <NA> 979. 35.1 71525
## 5 Solanum lycopersicum Eukaryota;Plants;Land P… <NA> 824. 35.7 36010
## 6 Hordeum vulgare Eukaryota;Plants;Land P… <NA> 9789. 0 0
eukaryotes_clean %>% rename(organism="#Organism Name",groups="Organism Groups",genome_size_mb="Size(Mb)",gc_percent="GC%") %>% rename_with(tolower) -> eukaryotes_clean
eukaryotes_clean %>% head()
## # A tibble: 6 x 6
## organism groups strain genome_size_mb gc_percent cds
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Emiliania huxleyi… Eukaryota;Protists… CCMP15… 168. 64.5 38554
## 2 Arabidopsis thali… Eukaryota;Plants;L… <NA> 120. 36.1 48350
## 3 Medicago truncatu… Eukaryota;Plants;L… A17 413. 34.0 57661
## 4 Glycine max Eukaryota;Plants;L… <NA> 979. 35.1 71525
## 5 Solanum lycopersi… Eukaryota;Plants;L… <NA> 824. 35.7 36010
## 6 Hordeum vulgare Eukaryota;Plants;L… <NA> 9789. 0 0
eukaryotes_clean %>% separate(groups,c("domain","kingdom","class"),sep = ";") -> eukaryotes_clean
eukaryotes_clean %>% head()
## # A tibble: 6 x 8
## organism domain kingdom class strain genome_size_mb gc_percent cds
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Emiliania hux… Eukary… Protists Other … CCMP1… 168. 64.5 38554
## 2 Arabidopsis t… Eukary… Plants Land P… <NA> 120. 36.1 48350
## 3 Medicago trun… Eukary… Plants Land P… A17 413. 34.0 57661
## 4 Glycine max Eukary… Plants Land P… <NA> 979. 35.1 71525
## 5 Solanum lycop… Eukary… Plants Land P… <NA> 824. 35.7 36010
## 6 Hordeum vulga… Eukary… Plants Land P… <NA> 9789. 0 0
#ctrl + shift +m
eukaryotes_clean %>% count(kingdom) %>% arrange(desc(n))
## # A tibble: 5 x 2
## kingdom n
## <chr> <int>
## 1 Fungi 2844
## 2 Animals 1250
## 3 Protists 539
## 4 Plants 478
## 5 Other 20
eukaryotes_clean %>% group_by(kingdom) %>% summarise(mean=mean(genome_size_mb),sd=sd(genome_size_mb),max=max(genome_size_mb),min=min(genome_size_mb))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 5
## kingdom mean sd max min
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Animals 1132. 1434. 32394. 0.663
## 2 Fungi 29.9 20.2 216. 2.08
## 3 Other 108. 129. 543. 12.1
## 4 Plants 1023. 2738. 27603. 0.986
## 5 Protists 46.7 57.1 808. 1.45
# geom_bar(stat = "count") vs geom_col(stat="identity")
# count(kingdom)
eukaryotes_clean %>% count(kingdom) %>% ggplot(., mapping = aes(x=reorder(kingdom,desc(n)),y=n,fill=kingdom, color=kingdom)) + geom_col(width = 0.001, alpha=0.7) + geom_point(size=4) + scale_fill_brewer(palette = "Set1") + scale_color_brewer(palette = "Set1") + geom_hline(yintercept = 250, color="red") + labs(x="Reino", y="Conteo", title = "Título") + coord_polar() + theme_cowplot() + theme(legend.text = element_text(size=7))
# 1. Agrupar y Reordenar
# 2. Rellenar
# 3. Modificar colores (fill, scale_fill_brewer(palette = "Set1"))
# 3. Añadir capas (p.ej. geom_hline)
# 4. Transparencia
# 5. Modificar etiquetas de los ejes usando labs(), (símbolos, ej.: expression(paste("Etiqueta ",mu)))
# 6. Modificar sistema de coordenadas: (coord_flip, coord_polar)
# 6. Modificar tema: theme_*, + theme(legend.text = element_text(size=7), legend.key.size = unit(0.5,"cm")
#eukaryotes_clean %>% count(kingdom) %>% ggplot(data = ., mapping = aes(x=reorder(kingdom,desc(n)),y=n,fill=kingdom)) + geom_col()
eukaryotes_clean %>% ggplot(.,aes(x=cds,y=genome_size_mb,fill=kingdom)) + geom_point() + facet_grid(~kingdom)
#library(plotly)
#ggplotly(grafica)
# 1. Reordenar: reorder()
# 2. Boxplot: geom_boxplot()
# 3. Rellenar y cambiar escala de color: fill=, scale_fill_brewer()
# 4. Cambiar escala (log10): scale_y_log10()
# 5. Invertir coordenadas: coord_flip()
# 6. Modificar tema: theme_*
# 7. Graficar por grupo: facet_wrap()
# 8. Asignar a una variable
# p. Versión interactiva con Plotly: ggplotly
#Haremos una versión utilizando geom_point en vez de geom_boxplot (para analizar los datos de cada organismo):
#Definir estructura de las etiquetas:
eukaryotes_clean %>% ggplot(data = ., mapping = aes(x=reorder(class,genome_size_mb),y=genome_size_mb, fill=kingdom, text=paste("Clase:", class,"\n Organismo", organism,"\nTamaño de Genoma:", genome_size_mb))) +
geom_point(alpha=0.7, color=NA) + #Añadir puntos, color=NA remueve el borde de cada uno
scale_fill_brewer( palette = "Set1") +
scale_y_log10() +
coord_flip() +
theme_bw() +
facet_wrap(~kingdom) +
labs(x="",y="Genome Size (Mb)", fill="") -> p
ggplotly(p, tooltip = c("text")) %>% layout(hoverlabel=list(bgcolor="white"))
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#ggplotly(p)
# Añadir etiquetas para cada organismo con información general:
# text= paste("Class: ",class,"\nOrganism: ",organism,"\nGenome size (Mb):", genome_size_mb)
# Modificar parámetro tooltip = c("text")
# Modificar menú de gráfica Plotly %>% config(displayModeBar = F)
# Modificar fondo de la etiqueta: %>% layout(hoverlabel=list(bgcolor="white"))